
## Figure B.6.
## Annual count of articles about the headscarf with endogenous peaks

# Setup -------------------------------------------------------------------

# set working directory
setwd("")

# load packages
packs <- c("foreign", "data.table", "ggplot2", "tidyverse", "stringr", "stringi", "haven", 
           "readxl", "lubridate", "stats", "ggthemes", "plyr", "RColorBrewer", "scales", "peakPick")
lapply(packs, require, character.only = TRUE)

# read dataset of articles about headscarf
dt.xl <- read.csv(file="news_articles.csv")
dt.xl <- data.table(dt.xl)

# set location to save graphs
graphpath <- ""


# Creating Policy Dataset -------------------------------------------------------
## Dates from Bowen (2007) of policymaking events:
  # Add date of implementation: September 2, 2004 
  # Education ministry issued administrative order for implementation: May 18, 2004
  # Add date of law passage: March 15, 2004
  # National Assembly votes for law: February 10, 2004
  # Add Stasi report completion: December 11, 2003
  # Stasi committee installed: July 3, 2003

# Creating dataset of all policy dates
policy_dates <- data.frame(cbind(c("2003-07-03", "2003-12-11", "2004-02-10", "2004-03-15", "2004-05-18", "2004-09-02"), 
                                 c("Stasi Commission", "Stasi Report", "Assembly Vote", "Law Passed", "Ministry Order", "Implementation")))
colnames(policy_dates) <- c("xint", "event")
policy_dates$xint <- as.character(policy_dates$xint)


# Creating Summary Datasets -------------------------------------------------------

## Limit dataset to articles from Le Monde 
lemonde <- dt.xl[source=="Le Monde",]

## Create count of Le Monde articles by year
annual_dt_lm <- plyr::ddply(.data=lemonde, .(date_year), summarise, 
                            n_articles = length(date_year))


# Plot Annual break points graph, Le Monde -------------------------------------------------------

## Detect spikes above 3 sd from mean within 3-year windows
annual_tst_lm <- annual_dt_lm
annualhits_lm <- detect.spikes(as.matrix(annual_tst_lm), c(4,27), 3, spike.min.sd = 3)

## Extract years that are indicated as spikes
break_points_lm <- annual_dt_lm$date_year[which(annualhits_lm[,2])]
break_points_lm <- as.data.frame(break_points_lm)

## Build graph
jpeg(filename = paste(as.character(graphpath), "/annual-break-lemonde.jpg", sep=""), width=1200, height=700,type="quartz", res=130)
ggplot(annual_dt_lm, aes(x=date_year, y=n_articles)) + 
  geom_line() + 
  labs(x = "Year", y = "Number of Articles about Headscarf") + 
  theme(axis.text=element_text(size=12), axis.title=element_text(size=16)) + 
  scale_color_manual(values= colorRampPalette(brewer.pal(8, "Dark2"))(8), name="Break Points:") +
  geom_vline(data = break_points_lm, aes(xintercept = break_points_lm, color = as.factor(break_points_lm)), linetype = "dashed") + 
  scale_x_continuous(breaks=seq(1990, 2018, 2), limits=c(1990, 2019)) + 
  theme_few() + 
  theme(legend.position = "bottom")
dev.off()


